{
 "nbformat": 4,
 "nbformat_minor": 0,
 "metadata": {
  "colab": {
   "provenance": [],
   "include_colab_link": true
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3 (ipykernel)",
   "language": "python"
  },
  "language_info": {
   "name": "python"
  }
 },
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "view-in-github",
    "colab_type": "text"
   },
   "source": [
    "<a href=\"https://colab.research.google.com/github/IgorLytkin/MyDataSpell/blob/main/%D0%9B%D0%B5%D0%BA%D1%86%D0%B8%D1%8F_6_7.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Линейная регресссия"
   ],
   "metadata": {
    "id": "SjLQtZRUhtQv"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "**В наборе данных о диабете содержатся измерения, проведенные у 442 пациентов с сахарным диабетом:**\n",
    "\n",
    "10 базовых переменных (функций):\n",
    "1. age - возраст в годах\n",
    "\n",
    "1.   sex - мужчина или женщина\n",
    "\n",
    "2. bmi - индекс массы тела\n",
    "3. bp - среднее кровяное давление\n",
    "4. s1 - TC: общий уровень холестерина в сыворотке крови\n",
    "5. s2 - LDL: липопротеины низкой плотности\n",
    "6. s3 - HDL: липопротеины высокой плотности\n",
    "7. s4 - TCH: общий холестерин\n",
    "8. s5 - LTG: возможное значение уровня триглицеридов в сыворотке крови\n",
    "9. s6 - GLU: уровень сахара в крови"
   ],
   "metadata": {
    "id": "lbVN8M26dZty"
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "id": "ZDR0NDZYh2nO"
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "from sklearn import datasets, linear_model\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "\n",
    "# Load the diabetes dataset\n",
    "diabetes_X, diabetes_y = datasets.load_diabetes(as_frame=True, return_X_y=True)\n",
    "diabetes_X.head()"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 206
    },
    "id": "U0WE99rSPqfO",
    "outputId": "ae9da285-d665-4d2b-86bb-ca0ca746b087",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "diabetes_X.shape"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "x8GDR1VFeHP0",
    "outputId": "be17e945-6932-43a5-be4c-7600164fe315",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)"
   ],
   "metadata": {
    "id": "OJPx0h1neXbl",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "diabetes_X[0]"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Hj3UORYkIKlr",
    "outputId": "a5c702a8-c322-4d84-ce16-8ad0b7871f60",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "# Use only one feature\n",
    "diabetes_X = diabetes_X[:, np.newaxis, 2]\n",
    "\n",
    "# Split the data into training/testing sets\n",
    "diabetes_X_train = diabetes_X[:-20]\n",
    "diabetes_X_test = diabetes_X[-20:]\n",
    "\n",
    "# Split the targets into training/testing sets\n",
    "diabetes_y_train = diabetes_y[:-20]\n",
    "diabetes_y_test = diabetes_y[-20:]\n",
    "\n",
    "# Create linear regression object\n",
    "regr = linear_model.LinearRegression()\n",
    "\n",
    "# Train the model using the training sets\n",
    "regr.fit(diabetes_X_train, diabetes_y_train)\n",
    "\n",
    "# Make predictions using the testing set\n",
    "diabetes_y_pred = regr.predict(diabetes_X_test)\n",
    "\n",
    "# The coefficients\n",
    "print(\"Coefficients: \\n\", regr.coef_)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\" % mean_squared_error(diabetes_y_test, diabetes_y_pred))\n",
    "# The coefficient of determination: 1 is perfect prediction\n",
    "print(\"Coefficient of determination: %.2f\" % r2_score(diabetes_y_test, diabetes_y_pred))\n",
    "\n",
    "# Plot outputs\n",
    "plt.scatter(diabetes_X_test, diabetes_y_test, color=\"black\")\n",
    "plt.plot(diabetes_X_test, diabetes_y_pred, color=\"blue\", linewidth=3)\n",
    "\n",
    "plt.xticks(())\n",
    "plt.yticks(())\n",
    "\n",
    "plt.show()"
   ],
   "metadata": {
    "id": "gg_ZoYhHRJED",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 475
    },
    "outputId": "6851f80b-f94c-464c-d3eb-e84368e7180a",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Логистическая регрессия"
   ],
   "metadata": {
    "id": "3dQi97lEh4rT"
   }
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "import seaborn as sns"
   ],
   "metadata": {
    "id": "qByOpUMjihTc",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "df = pd.read_csv('diabetes2.csv')\n",
    "df.head()"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 206
    },
    "id": "CnaSpa2Wh81w",
    "outputId": "7bd360e2-30f6-49f2-982d-2b8a105573bc",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "df.info()"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "0QmgU1BuiTAY",
    "outputId": "71c2db7e-2a61-4690-cc37-9e1e569b87a2",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 614
    },
    "id": "wb-4Le93iXtu",
    "outputId": "0bae3539-5481-4bf7-ee3e-45c38645fbe6",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "sns.countplot(x='Outcome',data=df)"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 466
    },
    "id": "p7n-ek7hidpH",
    "outputId": "49a2c538-0ce4-493f-eff6-955b2ce1a9bd",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "sns.distplot(df['Age'].dropna(),kde=True)"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 657
    },
    "id": "WppL1Nnri0J0",
    "outputId": "faa27d43-61cc-4c07-9db9-5e658eb06dd6",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "sns.heatmap(df.corr())"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 620
    },
    "id": "BYUnSx9Ui4Nb",
    "outputId": "67c1fc02-da60-45e5-d7cb-023af6eeb1a4",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "x = df.drop('Outcome',axis=1)\n",
    "y = df['Outcome']"
   ],
   "metadata": {
    "id": "Xy7lJ207jU1a",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "from sklearn.model_selection import train_test_split"
   ],
   "metadata": {
    "id": "JY18uVU5jXLh",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=101)"
   ],
   "metadata": {
    "id": "sfxY6lOgkC04",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "from sklearn.linear_model import LogisticRegression"
   ],
   "metadata": {
    "id": "dnDjdwzQkFdi",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "logmodel = LogisticRegression()"
   ],
   "metadata": {
    "id": "gkglFZsAkG6M",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "logmodel.fit(x_train,y_train)"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 213
    },
    "id": "RtmIlbPZkK4k",
    "outputId": "727892e1-7ea1-449e-9c63-c82e76372da4",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "y_pred = logmodel.predict(x_test)"
   ],
   "metadata": {
    "id": "5I9devahk6fn",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "from sklearn.metrics import f1_score\n",
    "from sklearn.metrics import precision_recall_fscore_support\n",
    "\n",
    "f1_score(y_test, y_pred)"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "vmtsfY7dkMQy",
    "outputId": "1d424ff5-9e9e-4ea0-ed78-801cd99c5d72",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [
    "precision_recall_fscore_support(y_test, y_pred)"
   ],
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "fYMPpD8DksQK",
    "outputId": "c370cbe2-b831-4d9b-e65f-e1bad2e686a7",
    "is_executing": true
   },
   "outputs": [],
   "execution_count": null
  },
  {
   "cell_type": "code",
   "source": [],
   "metadata": {
    "id": "ykuV3m4nlakk",
    "is_executing": true
   },
   "execution_count": null,
   "outputs": []
  }
 ]
}
